# encoding: utf-8

import os
import json
from glob import glob

import imagehash
from PIL import Image
from tqdm import tqdm

img_path = "./imgs/origin/*.jpg"

hash_id = dict()


def get_hash(img_file_path: str):
    try:
        d_hash = imagehash.dhash(Image.open(img_file_path))
        if d_hash in hash_id:
            origin_path = hash_id.get(d_hash)
            return origin_path
        hash_id[d_hash] = img_file_path
        return False
    except Exception as e:
        pass

    return False


img_list = glob(img_path)

img_list = [os.path.abspath(img_file) for img_file in img_list]

repeat = []

for img_ in tqdm(img_list):
    old_img = get_hash(img_)
    if old_img:
        data = {
            "a": img_,
            "b": old_img
        }
        repeat.append(data)

with open("./repeat_img.txt", "w") as f:
    for one in repeat:
        f.write(json.dumps(one, ensure_ascii=False) + "\n")
